# !usr/bin/env python
# -*- coding:utf-8 _*-
"""
@Author:张广勤
@Web site: https://www.tunan.wang
@Github:www.github.com
 
@File:chinese_brackets_set1_0.py
@Time:2024/5/10 8:53

@Motto:不积跬步无以至千里，不积小流无以成江海！
"""
import os
import csv
import re

def cctv_news_topic():
    # 存放结果的集合,  【】专题
    content_set = set()

    # 替换为你的文件夹路径
    folder_path = './news'

    # 遍历文件夹下的所有文件
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):  # 检查是否为CSV文件
            file_path = os.path.join(folder_path, filename)

            # 尝试打开文件并读取内容
            try:
                with open(file_path, 'r', encoding='utf-8', newline='') as csvfile:
                    reader = csv.reader(csvfile)

                    # 遍历CSV文件的每一行
                    for row in reader:
                        # 遍历行中的每个单元格
                        for cell in row:
                            # 使用正则表达式匹配`[` 和 `]` 之间的内容
                            matches = re.findall(r'\【([^\[\]]*)\】', cell)
                            # 将匹配到的内容添加到集合中
                            content_set.update(matches)
            except Exception as e:
                print(f"Error reading {file_path}: {e}")

    # 打印结果集合
    # print(content_set)
    # print(type(content_set))
    # print(list(content_set))
    #集合无序
    return list(content_set)

if __name__ == "__main__":
    print(cctv_news_topic())